Close

@InProceedings{VieiraOliv:2021:GaEsVi,
               author = "Vieira, Gabriel Lefundes and Oliveira, Luciano",
          affiliation = "{Federal University of Bahia } and {Federal University of Bahia}",
                title = "Gaze estimation via self-attention augmented convolutions",
            booktitle = "Proceedings...",
                 year = "2021",
               editor = "Paiva, Afonso and Menotti, David and Baranoski, Gladimir V. G. and 
                         Proen{\c{c}}a, Hugo Pedro and Junior, Antonio Lopes Apolinario 
                         and Papa, Jo{\~a}o Paulo and Pagliosa, Paulo and dos Santos, 
                         Thiago Oliveira and e S{\'a}, Asla Medeiros and da Silveira, 
                         Thiago Lopes Trugillo and Brazil, Emilio Vital and Ponti, Moacir 
                         A. and Fernandes, Leandro A. F. and Avila, Sandra",
         organization = "Conference on Graphics, Patterns and Images, 34. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "deep learning, gaze estimation, attention-augmented 
                         convolutions.",
             abstract = "Although recently deep learning methods have boosted the accuracy 
                         of appearance-based gaze estimation, there is still room for 
                         improvement in the network architectures for this particular task. 
                         Hence we propose here a novel network architecture grounded on 
                         self-attention augmented convolutions to improve the quality of 
                         the learned features during the training of a shallower residual 
                         network. The rationale is that self-attention mechanism can help 
                         outperform deeper architectures by learning dependencies between 
                         distant regions in full-face images. This mechanism can also 
                         create better and more spatially-aware feature representations 
                         derived from the face and eye images before gaze regression. We 
                         dubbed our framework ARes-gaze, which explores our 
                         Attention-augmented ResNet (ARes-14) as twin convolutional 
                         backbones. In our experiments, results showed a decrease of the 
                         average angular error by 2.38% when compared to state-of-the-art 
                         methods on the MPIIFaceGaze data set, while achieving a 
                         second-place on the EyeDiap data set. It is noteworthy that our 
                         proposed framework was the only one to reach high accuracy 
                         simultaneously on both data sets.",
  conference-location = "Gramado, RS, Brazil (virtual)",
      conference-year = "18-22 Oct. 2021",
                  doi = "10.1109/SIBGRAPI54419.2021.00016",
                  url = "http://dx.doi.org/10.1109/SIBGRAPI54419.2021.00016",
             language = "en",
                  ibi = "8JMKD3MGPEW34M/45CPHC5",
                  url = "http://urlib.net/ibi/8JMKD3MGPEW34M/45CPHC5",
           targetfile = "gaze_attention_sibgrapi_2021_CAMERA_READY(1).pdf",
        urlaccessdate = "2024, May 06"
}


Close